<?php

require_once "common.php";

define('CONTACTS_GROUP',date('Y年m月d日H:i 导入数据'));
_Log('开始获取1688的数据');


$pn_url = 'http://s.hc360.com/?w=%B9%AB%CB%BE&mc=enterprise&z=%D6%D0%B9%FA%3A%CB%C4%B4%A8%CA%A1%3A%B3%C9%B6%BC%CA%D0&ee=';

$referer ='';
for($i=1;$i<1000;$i++){
    _Log("第{$i}页数据获取开始！",'note');
    $url = $pn_url.$i;
    _Log("发起请求到：{$url}",'note');
    $content = _request($url,$referer);
    $referer = $url;
    if(!$content){
        _Log("第{$i}页数据获取失败！",'error');
        continue;
    }
    _Log("第{$i}页数据获取成功！",'success');
    _Log("开始解析第{$i}页数据！",'note');
    //$content = iconv('GBK','UTF-8',$content);
    phpQuery::newDocumentHTML($content);
    $detail_pages = pq('.moreproduct a.tel');
    $detail_pages_size = $detail_pages->size();
    if(!$detail_pages_size>0){
        _Log("第{$i}页数据解析失败！",'success');
    }
    $detail_urls = array();
    for($j=0;$j<$detail_pages_size;$j++){
        $detail_urls[] = $detail_pages->get($j)->getAttribute('href');
    }
    for($j=0;$j<$detail_pages_size;$j++){
        $detail_url =$detail_urls[$j];

        _Log("开始获取{$i}-{$j}详细页内容！",'note');
        $detail_content = _request($detail_url,$url);

        if(!$detail_content){
            _Log("{$i}-{$j}详细页内容获取失败！",'error');
            continue;
        }
        _Log("详细页{$i}-{$j}内容获取成功！",'success');
        //$detail_content = iconv('GBK','UTF-8',$detail_content);
        //echo  $detail_content;
        phpQuery::newDocumentHTML($detail_content);
        $title = pq('title')->text();

        $tel = pq('#popLoginShow .tel02 ')->text();
        //echo  $tel;exit;

        $title = pq('title')->text();
        $title = preg_replace('/\s*/','',$title);
        if($tel){
            $status = preg_match_all('/[1-9][0-9]{10}/is',$tel,$tels);

            if($status && $tels && isset($tels[0])){
                $tels = $tels[0];
                $tels = array_unique($tels);
                _Log("开始抓取[{$title}]的联系信息",'note');
                $name =pq('.contactbox a span.bluezzbold ')->text();
                foreach ($tels as $tel){
                    if(isTel($tel)){
                        _Log("[{$title}]的联系信息获取成功！",'success');
                        //echo .'<br>';
                        _Log('抓取到号码：['.$name.']'. $tel,'success');
                        write_link_to_xml($name,$tel);
                    }else{

                        _Log("[{$title}]的联系信息获取失败！",'error');
                    }
                }
            }
        }else{
            _Log("[{$title}]的联系信息不存在！",'error');
            continue;
        }




        unset($detail_content);
        usleep(rand(100,1000));
        //exit;
    }




    unset($detail_pages);
    unset($detail_urls);
    unset($content);
    usleep(rand(100,1000));
    _Log("第{$i}页数据获取结束！",'note');
}



function write_link_to_xml($name,$tel){
    static $index = 1,$fIndex = 1;
    if(!has_tel($tel)){
        if($index>=1000){
            $fIndex ++;
            $index = 1;
        }
        $file = dirname(__FILE__).'/data/hc.'.NOW_DATE.'.contacts.'.$fIndex.'.csv';
        if($index ==1){
            //输出表头
            file_put_contents($file,"\"姓\",\"名\",\"前缀\",\"后缀\",\"其他电话\",\"qq同步助手 分组\",\n");
        }
        //"{$name}\t{$tel}\n"
        $group = CONTACTS_GROUP;
        file_put_contents($file,"\"$name\",\"\",\"\",\"\",\"{$tel}\",\"{$group}\",\n",FILE_APPEND);
        $index++;
    }
}


function has_tel($tel){
    $dir = dirname(__FILE__).'/data/repeat/'.NOW_DATE.'/';
    if(!is_dir($dir)){
        mkdir($dir,0777,true);
    }
    $file = $dir.$tel;
    if(is_file($file)){
        return true;
    }else{
        file_put_contents($file,$tel);
        return false;
    }
}